package com.chance.cc.crawler.development.bootstrap.tencent.qqcarnews;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.chance.cc.crawler.core.CrawlerEnum;
import com.chance.cc.crawler.core.filter.FilterUtils;
import com.chance.cc.crawler.core.record.CrawlerRecord;
import com.chance.cc.crawler.core.record.CrawlerRequestRecord;
import com.chance.cc.crawler.development.controller.DevCrawlerController;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.DateUtils;
import org.junit.Test;

import java.text.ParseException;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Filter;

import static com.chance.cc.crawler.core.CrawlerEnum.CrawlerRequestType.filter;

/**
 * author zhouzeng
 * time 2021/10/09
 * */
public class QQCarStart {
    private static final String DOMAIN = "tenxun";
    private static final String SITE = "car";
    public static void main(String[] args) {
        String module = "newcar";
        String url = "https://auto.qq.com/"+module+".htm";

        CrawlerRequestRecord requestRecord =  CrawlerRequestRecord.builder()
                .startPageRequest(DOMAIN,CrawlerEnum.CrawlerRequestType.turnPage)
                .domain(DOMAIN)
                .httpUrl(url)
                .recordKey(url)
                .releaseTime(System.currentTimeMillis())
                .filter(CrawlerEnum.CrawlerRecordFilter.keyOrDateRange)
                .addFilterInfo(FilterUtils.memoryFilterKeyInfo(DOMAIN))
                .addFilterInfo(FilterUtils.dateRangeFilterInfo(24*7,null))
                .resultLabelTag(CrawlerEnum.CrawlerDataType.article)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.interaction)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.comment)
                .build();
        requestRecord.tagsCreator().bizTags().addDomain(DOMAIN);
        requestRecord.tagsCreator().bizTags().addSite(SITE);
        List<String> moduleTags = new ArrayList<>();//按照不同的功能模块区分不同站点
        //moduleTags.add("guide");
        //moduleTags.add("evaluat");
        //moduleTags.add("tech");
        //moduleTags.add("news");
        requestRecord.tagsCreator().bizTags().addCustomKV("moduleTags",moduleTags);
        //最新评论过滤
        CrawlerRecord commentFilterRecord = new CrawlerRecord();
        commentFilterRecord.setFilter(CrawlerEnum.CrawlerRecordFilter.dateRange);
        commentFilterRecord.addFilterInfo(FilterUtils.dateRangeFilterInfo(24,null));
        commentFilterRecord.addFilterInfo(FilterUtils.memoryFilterKeyInfo(StringUtils.joinWith("-",filter,DOMAIN,"comment")));
        requestRecord.tagsCreator().bizTags().addCustomKV("comment_record_filter_info", JSON.toJSONString(commentFilterRecord));

        DevCrawlerController.builder()
                .triggerInfo(DOMAIN,DOMAIN,System.currentTimeMillis(),DOMAIN)
                .crawlerRequestQueue(DevCrawlerController.devRequestQueue(DOMAIN))
                .fileResultPipeline("F:\\changxi\\web_data\\qqcar.log",false)
                .consoleResultPipeline()
                .requestRecord(requestRecord)
                .build("com.chance.cc.crawler.development.scripts.tenxun.qqcarnews.QQCarCrawlerScript")
                .start();
    }
}
